import scrapy


class CoreSpider(scrapy.Spider):
    name = 'core'
    # allowed_domains = ['www.521609.com/tuku/']
    # 起始url（入口）
    start_urls = ['http://www.521609.com/tuku/']
    # 生成一个通用的url模板
    url = 'http://www.521609.com/tuku/index_{}.html'
    num_page = 2
    def parse(self, response):
        """
        解析校花网图库的全部图片名称
        """
        # 爬取首页图片名称
        li_list = response.xpath('/html/body/div[4]/div[3]/ul/li')
        for li in li_list:
            img_name = li.xpath('./a/p/text()')[0].extract()
            print(img_name)
        # 爬取其他页面图片名称
        if self.num_page <= 51:
            new_url = self.url.format(self.num_page)
            self.num_page += 1
            # 手动发起请求，callback回调函数专门用于数据解析
            yield scrapy.Request(url=new_url, callback=self.parse)


        